Libraries¶

In [ ]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
In [ ]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
In [ ]:
import tensorflow as tf
from keras.layers import Dense, SimpleRNN
from keras.layers import InputLayer
from tensorflow import keras
from tensorflow.keras import models, layers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, GlobalMaxPooling1D, Bidirectional
In [ ]:
from sklearn.metrics import confusion_matrix, classification_report
import itertools
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
In [ ]:
pip install lime
Requirement already satisfied: lime in /usr/local/lib/python3.10/dist-packages (0.2.0.1)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from lime) (3.7.1)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from lime) (1.23.5)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from lime) (1.10.1)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from lime) (4.66.1)
Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.10/dist-packages (from lime) (1.2.2)
Requirement already satisfied: scikit-image>=0.12 in /usr/local/lib/python3.10/dist-packages (from lime) (0.19.3)
Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (3.1)
Requirement already satisfied: pillow!=7.1.0,!=7.1.1,!=8.3.0,>=6.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (9.4.0)
Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (2.31.1)
Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (2023.8.12)
Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (1.4.1)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (23.1)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.18->lime) (1.3.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.18->lime) (3.2.0)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (1.1.0)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (4.42.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (1.4.4)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (3.1.1)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (2.8.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->lime) (1.16.0)
In [ ]:
from lime import lime_tabular

Functions¶

In [ ]:
# Plotting Model Performance Metrics
def display_training_loss(training, validation):
    plt.clf()   # clear figure
    loss = training
    val_loss = validation
    epochs = range(1, len(loss) + 1)
    # "bo" is for "blue dot"
    plt.plot(epochs, loss, 'bo', label='Training loss')
    # b is for "solid blue line"
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')


    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def display_training_accuracy(training, validation):
    plt.clf()   # clear figure
    acc = training
    val_acc = validation
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')

    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.show()

def print_validation_report(test_labels, predictions):
    print("Classification Report")
    print(classification_report(test_labels, predictions))
    print('Accuracy Score: {}'.format(accuracy_score(test_labels, predictions)))
    print('Root Mean Square Error: {}'.format(np.sqrt(MSE(test_labels, predictions))))

sns.set(font_scale=1)
def plot_confusion_matrix(y_true, y_pred):
    mtx = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(16,12))
    sns.heatmap(mtx, annot=True, fmt='d', linewidths=.75,  cbar=False, ax=ax,cmap='Blues',linecolor='white')
    #  square=True,
    plt.ylabel('true label', fontsize=20)
    plt.xlabel('predicted label', fontsize=20)

Import data¶

Robots are smart… by design. To fully understand and properly navigate a task, however, they need input about their environment.

This data comes from Kaggle competition, "CareerCon 2019 - Help Navigate Robots"

We'll help robots recognize the floor surface they’re standing on using data collected from Inertial Measurement Units (IMU sensors).

Researcher’ve collected IMU sensor data while driving a small mobile robot over different floor surfaces on the university premises. The task is to predict which one of the nine floor types (carpet, tiles, concrete) the robot is on using sensor data such as acceleration and velocity. Succeed and we'll help improve the navigation of robots without assistance across many different surfaces, so they won’t fall down on the job.

The data has been collected by Heikki Huttunen and Francesco Lomio from the Department of Signal Processing and Damoon Mohamadi, Kaan Celikbilek, Pedram Ghazi and Reza Ghabcheloo from the Department of Automation and Mechanical Engineering both from Tampere University, Finland.

In [ ]:
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [ ]:
train = pd.read_csv('/content/drive/MyDrive/NU/MSDS458/career-con-2019/X_train.csv')
y = pd.read_csv('/content/drive/MyDrive/NU/MSDS458/career-con-2019/y_train.csv')
test = pd.read_csv('/content/drive/MyDrive/NU/MSDS458/career-con-2019/sample_submission.csv')
sub = pd.read_csv('/content/drive/MyDrive/NU/MSDS458/career-con-2019/sample_submission.csv')

Train/Test Data split¶

https://kimamani89.com/2019/06/09/post-659/

In [ ]:
encode_dic = {'fine_concrete': 0,
              'concrete': 1,
              'soft_tiles': 2,
              'tiled': 3,
              'soft_pvc': 4,
              'hard_tiles_large_space': 5,
              'carpet': 6,
              'hard_tiles': 7,
              'wood': 8}
decode_dic = {0: 'fine_concrete',
              1: 'concrete',
              2: 'soft_tiles',
              3: 'tiled',
              4: 'soft_pvc',
              5: 'hard_tiles_large_space',
              6: 'carpet',
              7: 'hard_tiles',
              8: 'wood'}
In [ ]:
feature_names = list(train.columns)[3:]
In [ ]:
train.drop(['row_id', "series_id", "measurement_number"], axis=1, inplace=True)
train = train.values.reshape((3810, 128, 10))
In [ ]:
#y = y['surface'].map(encode_dic).astype(int)
In [ ]:
#y.head()
In [ ]:
#y = y['surface'].map(encode_dic).astype(int)
y.drop(["series_id", "group_id"], axis=1, inplace=True)
y = y['surface'].map(encode_dic).astype(int)
y = y.values.reshape((3810, 1))
In [ ]:
x_train, x_test, y_train, y_test = train_test_split(train, y, test_size=0.3, random_state=42)
In [ ]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.3, random_state=42)
In [ ]:
y_columns = ['surface']
y_train_df = pd.DataFrame(data = y_train, columns = y_columns)
y_train_df['surface'] = y_train_df['surface'].map(decode_dic).astype(str)
y_val_df = pd.DataFrame(data = y_val, columns = y_columns)
y_val_df['surface'] = y_val_df['surface'].map(decode_dic).astype(str)
y_test_df = pd.DataFrame(data = y_test, columns = y_columns)
y_test_df['surface'] = y_test_df['surface'].map(decode_dic).astype(str)
In [ ]:
df = y_train_df['surface'].value_counts().reset_index().rename(columns={'index': 'target'})
sns.barplot(x=df['surface'], y=df['target'])
plt.title('Counts of surface class')
plt.show()
In [ ]:
df = y_val_df['surface'].value_counts().reset_index().rename(columns={'index': 'target'})
sns.barplot(x=df['surface'], y=df['target'])
plt.title('Counts of surface class')
plt.show()
In [ ]:
df = y_test_df['surface'].value_counts().reset_index().rename(columns={'index': 'target'})
sns.barplot(x=df['surface'], y=df['target'])
plt.title('Counts of surface class')
plt.show()

Build Model¶

In [ ]:

In [ ]:
model = models.Sequential()
model.add(InputLayer(batch_input_shape=(None, 128, 10)))
model.add(Bidirectional(SimpleRNN(units=10, activation='relu')))
model.add(Dense(9, activation='softmax')) #softmax is used as the activation function for multi-class classification problems where class membership is required on more than two class labels.
model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirectiona  (None, 20)               420       
 l)                                                              
                                                                 
 dense (Dense)               (None, 9)                 189       
                                                                 
=================================================================
Total params: 609
Trainable params: 609
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model, "plot.png", show_shapes=True) # plot a graph of the model
Out[ ]:
In [ ]:
%%time
model.compile(loss='SparseCategoricalCrossentropy', #Sparse Categorical Crossentropy Loss because data is not one-hot encoded
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(x_train,
          y_train,
          validation_data=(x_val, y_val),
          epochs=30)
Epoch 1/30
59/59 [==============================] - 11s 142ms/step - loss: 4.5891 - accuracy: 0.1072 - val_loss: 3.0954 - val_accuracy: 0.0924
Epoch 2/30
59/59 [==============================] - 5s 88ms/step - loss: 2.6427 - accuracy: 0.1393 - val_loss: 2.3925 - val_accuracy: 0.1586
Epoch 3/30
59/59 [==============================] - 7s 111ms/step - loss: 2.2873 - accuracy: 0.1710 - val_loss: 2.1985 - val_accuracy: 0.1798
Epoch 4/30
59/59 [==============================] - 7s 117ms/step - loss: 2.1419 - accuracy: 0.1902 - val_loss: 2.1128 - val_accuracy: 0.2010
Epoch 5/30
59/59 [==============================] - 6s 101ms/step - loss: 2.0645 - accuracy: 0.2251 - val_loss: 2.0642 - val_accuracy: 0.2285
Epoch 6/30
59/59 [==============================] - 7s 113ms/step - loss: 2.0180 - accuracy: 0.2401 - val_loss: 2.0302 - val_accuracy: 0.2484
Epoch 7/30
59/59 [==============================] - 6s 108ms/step - loss: 1.9839 - accuracy: 0.2578 - val_loss: 2.0012 - val_accuracy: 0.2609
Epoch 8/30
59/59 [==============================] - 6s 95ms/step - loss: 1.9578 - accuracy: 0.2728 - val_loss: 1.9757 - val_accuracy: 0.2934
Epoch 9/30
59/59 [==============================] - 3s 56ms/step - loss: 1.9357 - accuracy: 0.2803 - val_loss: 1.9544 - val_accuracy: 0.2759
Epoch 10/30
59/59 [==============================] - 2s 39ms/step - loss: 1.9066 - accuracy: 0.2889 - val_loss: 1.9326 - val_accuracy: 0.2784
Epoch 11/30
59/59 [==============================] - 2s 38ms/step - loss: 1.8820 - accuracy: 0.3044 - val_loss: 1.9099 - val_accuracy: 0.2946
Epoch 12/30
59/59 [==============================] - 2s 36ms/step - loss: 1.8536 - accuracy: 0.3076 - val_loss: 1.8867 - val_accuracy: 0.3221
Epoch 13/30
59/59 [==============================] - 2s 37ms/step - loss: 1.8284 - accuracy: 0.3119 - val_loss: 1.8500 - val_accuracy: 0.3159
Epoch 14/30
59/59 [==============================] - 4s 72ms/step - loss: 1.7973 - accuracy: 0.3119 - val_loss: 1.8287 - val_accuracy: 0.3109
Epoch 15/30
59/59 [==============================] - 3s 50ms/step - loss: 1.7718 - accuracy: 0.3242 - val_loss: 1.8184 - val_accuracy: 0.3171
Epoch 16/30
59/59 [==============================] - 2s 41ms/step - loss: 1.7497 - accuracy: 0.3248 - val_loss: 1.7693 - val_accuracy: 0.3196
Epoch 17/30
59/59 [==============================] - 2s 39ms/step - loss: 1.7251 - accuracy: 0.3280 - val_loss: 1.7480 - val_accuracy: 0.3321
Epoch 18/30
59/59 [==============================] - 2s 37ms/step - loss: 1.7072 - accuracy: 0.3387 - val_loss: 1.7481 - val_accuracy: 0.3296
Epoch 19/30
59/59 [==============================] - 3s 53ms/step - loss: 1.8438 - accuracy: 0.3130 - val_loss: 1.7700 - val_accuracy: 0.3084
Epoch 20/30
59/59 [==============================] - 4s 71ms/step - loss: 1.6862 - accuracy: 0.3462 - val_loss: 1.7255 - val_accuracy: 0.3233
Epoch 21/30
59/59 [==============================] - 2s 38ms/step - loss: 1.6688 - accuracy: 0.3446 - val_loss: 1.7149 - val_accuracy: 0.3109
Epoch 22/30
59/59 [==============================] - 2s 41ms/step - loss: 1.6551 - accuracy: 0.3532 - val_loss: 1.7396 - val_accuracy: 0.3171
Epoch 23/30
59/59 [==============================] - 2s 39ms/step - loss: 1.6455 - accuracy: 0.3687 - val_loss: 1.6773 - val_accuracy: 0.3333
Epoch 24/30
59/59 [==============================] - 2s 40ms/step - loss: 1.6698 - accuracy: 0.3537 - val_loss: 1.7011 - val_accuracy: 0.3296
Epoch 25/30
59/59 [==============================] - 4s 71ms/step - loss: 1.6241 - accuracy: 0.3703 - val_loss: 1.6686 - val_accuracy: 0.3383
Epoch 26/30
59/59 [==============================] - 3s 42ms/step - loss: 1.6035 - accuracy: 0.3773 - val_loss: 1.6566 - val_accuracy: 0.3346
Epoch 27/30
59/59 [==============================] - 2s 37ms/step - loss: 1.6234 - accuracy: 0.3569 - val_loss: 1.6776 - val_accuracy: 0.3371
Epoch 28/30
59/59 [==============================] - 2s 36ms/step - loss: 1.5948 - accuracy: 0.3751 - val_loss: 1.6682 - val_accuracy: 0.3271
Epoch 29/30
59/59 [==============================] - 2s 38ms/step - loss: 1.5727 - accuracy: 0.3805 - val_loss: 1.6365 - val_accuracy: 0.3433
Epoch 30/30
59/59 [==============================] - 3s 44ms/step - loss: 1.5628 - accuracy: 0.3741 - val_loss: 1.6230 - val_accuracy: 0.3533
CPU times: user 1min 58s, sys: 1.71 s, total: 2min
Wall time: 2min 24s

Testing the model¶

In [ ]:
test_loss, test_acc = model.evaluate(x_test, y_test)
36/36 [==============================] - 0s 11ms/step - loss: 1.6661 - accuracy: 0.3473
In [ ]:
print(f'test acc: {test_acc}, test loss: {test_loss}')
test acc: 0.34733158349990845, test loss: 1.6660746335983276

Plot the performance¶

In [ ]:
history_dict = history.history
history_dict.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
display_training_loss(history.history['loss'], history.history['val_loss'])
In [ ]:
display_training_accuracy(history.history['accuracy'], history.history['val_accuracy'])

Plot the confusion matrix¶

In [ ]:
def plot_confusion_matrix(truth, pred, classes, normalize=False, title=''):
    cm = confusion_matrix(truth, pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.figure(figsize=(10, 10))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion matrix', size=15)
    plt.colorbar(fraction=0.046, pad=0.04)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.grid(False)
    plt.tight_layout()
In [ ]:
pred_classes = np.argmax(model.predict(x_test), axis=-1)
36/36 [==============================] - 1s 14ms/step
In [ ]:
plot_confusion_matrix(y_test, pred_classes, encode_dic.keys())
In [ ]:
plot_confusion_matrix(y_test, pred_classes, encode_dic.keys(), normalize=True)

Activation¶

In [ ]:
# Extracts the outputs of the 2 layers:
layer_outputs = [layer.output for layer in model.layers]

# Creates a model that will return these outputs, given the model input:
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)

# Get the outputs of all the hidden nodes for each of the training images
activations = activation_model.predict(x_train)
hidden_layer_activation = activations[0]

#Get the dataframe of all the node values
activation_data = {'actual_class':y_train.reshape(y_train.shape[0],)}
for k in range(0,10):
    activation_data[f"act_val_{k}"] = hidden_layer_activation[:,k]

activation_df = pd.DataFrame(activation_data)
activation_df.head(15).round(3)
59/59 [==============================] - 1s 13ms/step
Out[ ]:
actual_class act_val_0 act_val_1 act_val_2 act_val_3 act_val_4 act_val_5 act_val_6 act_val_7 act_val_8 act_val_9
0 1 0.000 0.828 0.000 0.0 0.000 0.969 0.772 0.000 3.725 0.000
1 2 0.000 0.522 0.000 0.0 0.000 0.620 1.129 0.000 3.765 0.000
2 1 0.000 13.385 1.838 0.0 3.437 0.000 0.000 7.533 0.000 0.000
3 1 0.000 1.389 0.000 0.0 0.000 1.327 2.182 0.000 5.548 0.000
4 2 3.176 0.000 0.000 0.0 0.000 2.348 0.169 0.000 1.149 0.000
5 1 0.000 0.406 0.000 0.0 0.000 0.886 1.191 0.000 2.813 0.000
6 0 0.000 0.038 0.276 0.0 0.000 2.104 1.692 0.000 2.470 0.000
7 1 0.000 0.000 0.000 0.0 0.000 0.186 1.657 0.000 4.021 0.000
8 5 1.273 0.000 0.000 0.0 0.000 1.114 1.911 0.000 2.508 0.000
9 3 0.000 0.976 1.327 0.0 0.000 1.204 1.931 0.000 4.622 0.000
10 2 2.538 0.000 0.000 0.0 0.000 1.660 0.162 0.000 1.591 0.000
11 4 2.649 0.000 0.000 0.0 0.000 1.751 0.279 0.000 1.231 0.000
12 8 0.000 0.000 0.000 0.0 0.000 0.000 0.000 0.000 1.669 0.304
13 4 3.254 0.000 0.000 0.0 0.000 2.271 0.162 0.000 1.329 0.000
14 4 1.938 0.160 0.290 0.0 0.000 1.812 0.274 0.000 1.507 0.000

We get the activation values of the first hidden node and combine them with the corresponding class labels into a DataFrame. We use both matplotlib and seaborn to create boxplots from the dataframe.

In [ ]:
# To see how closely the hidden node activation values correlate with the class labels
# Let us use seaborn for the boxplots this time.
plt.figure(figsize=(16,10))
bplot = sns.boxplot(y='act_val_0', x='actual_class',
                 data=activation_df[['act_val_0','actual_class']],
                 width=0.5,
                 palette="colorblind")

Displaying The Range Of Activation Values For Each Class Labels

In [ ]:
activation_df.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
 round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
Out[ ]:
actual_class range_of_act_values
0 0 [0.0, 4.09]
1 1 [0.0, 3.8]
2 2 [0.0, 3.43]
3 3 [0.0, 3.9]
4 4 [0.0, 4.02]
5 5 [0.0, 3.4]
6 6 [0.0, 0.76]
7 7 [2.56, 3.46]
8 8 [0.0, 4.18]

N=45000 activation_df_subset = activation_df.iloc[:N].copy() activation_df_subset.shape

In [ ]:
N=y_train.shape[0]
activation_df_subset = activation_df.iloc[:N].copy()
activation_df_subset.shape
Out[ ]:
(1866, 11)
In [ ]:
# Separating out the features
features = [*activation_data][1:] # ['act_val_0', 'act_val_1',...]

data_subset = activation_df_subset[features].values
data_subset.shape
Out[ ]:
(1866, 10)
In [ ]:
%%time
tsne = TSNE(n_components=2
            ,init='pca'
            ,learning_rate='auto'
            ,verbose=1
            ,perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(data_subset)
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1866 samples in 0.004s...
[t-SNE] Computed neighbors for 1866 samples in 0.138s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1866
[t-SNE] Computed conditional probabilities for sample 1866 / 1866
[t-SNE] Mean sigma: 0.284832
[t-SNE] KL divergence after 250 iterations with early exaggeration: 63.746349
[t-SNE] KL divergence after 300 iterations: 1.275703
CPU times: user 9.82 s, sys: 46.7 ms, total: 9.87 s
Wall time: 6.11 s
In [ ]:
tsne_results = (tsne_results - tsne_results.min()) / (tsne_results.max() - tsne_results.min())
In [ ]:
tsne_results_df = pd.DataFrame(tsne_results, columns=["tsne0", "tsne1"])
tsne_results_df["actual_class"] = activation_df_subset["actual_class"]
tsne_results_df["actual_class_label"] = tsne_results_df["actual_class"].map(decode_dic).astype(str)
In [ ]:
test=((tsne_results_df["actual_class"].unique()))
test
Out[ ]:
array([1, 2, 0, 5, 3, 4, 8, 6, 7])
In [ ]:
cmap = plt.cm.tab10
plt.figure(figsize=(16,10))
#plt.scatter(tsne_results[:,0],tsne_results[:,1], c=y_train, s=10, cmap=cmap)
scatter1 = plt.scatter(tsne_results_df["tsne0"], tsne_results_df["tsne1"], c=tsne_results_df["actual_class"], s=10, cmap=cmap, label=tsne_results_df["actual_class_label"])

# image_positions = np.array([[1., 1.]])
# for index, position in enumerate(tsne_results):
#     dist = np.sum((position - image_positions) ** 2, axis=1)
#     if np.min(dist) > 0.02: # if far enough from other images
#         image_positions = np.r_[image_positions, [position]]
#         imagebox = mpl.offsetbox.AnnotationBbox(
#             mpl.offsetbox.OffsetImage(np.reshape(x_train[index], (32, 32, 3))),
#             position, bboxprops={"edgecolor": cmap(y_train[index]), "lw": 2})
#         plt.gca().add_artist(imagebox)
plt.legend(*scatter1.legend_elements())

plt.axis("off")

plt.show()

Lime explanation¶

https://github.com/marcotcr/lime/blob/master/doc/notebooks/Lime%20with%20Recurrent%20Neural%20Networks.ipynb

https://lime-ml.readthedocs.io/en/latest/lime.html

In [ ]:
explainer = lime_tabular.RecurrentTabularExplainer(x_train, training_labels=y_train, feature_names=feature_names)
In [ ]:
exp = explainer.explain_instance(x_test[0], model.predict, num_features=10, labels=(6,))
exp.show_in_notebook()
157/157 [==============================] - 4s 27ms/step
In [ ]:
answer = y_test[0][0]

print(answer, ":", decode_dic[answer])
6 : carpet
In [ ]:
exp = explainer.explain_instance(x_test[1], model.predict, num_features=10, labels=(6,))
exp.show_in_notebook()
157/157 [==============================] - 2s 12ms/step
In [ ]:
answer = y_test[1][0]

print(answer, ":", decode_dic[answer])
6 : carpet
In [ ]:
exp = explainer.explain_instance(x_test[2], model.predict, num_features=10, labels=(6,))
exp.show_in_notebook()
157/157 [==============================] - 2s 12ms/step
In [ ]:
answer = y_test[2][0]

print(answer, ":", decode_dic[answer])
6 : carpet
In [ ]:
exp = explainer.explain_instance(x_test[3], model.predict, num_features=10, labels=(1,))
exp.show_in_notebook()
157/157 [==============================] - 2s 13ms/step
In [ ]:
answer = y_test[3][0]

print(answer, ":", decode_dic[answer])
1 : concrete
In [ ]:
exp = explainer.explain_instance(x_test[4], model.predict, num_features=10, labels=(4,))
exp.show_in_notebook()
157/157 [==============================] - 4s 28ms/step
In [ ]:
answer = y_test[4][0]

print(answer, ":", decode_dic[answer])
4 : soft_pvc
In [ ]: